import os
import tqdm
import seaborn as sns
import scanpy as sc
import anndata
import squidpy as sq
import decoupler as dc
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
plt.rcdefaults()
sc.set_figure_params()
plt.rcParams['axes.grid'] = False
plt.rcParams['figure.figsize'] = (5,5)
from distance import calc_neighborhood,calc_intraneighborhood
from deconvolution import calc_leiden_on_deconv,get_dense_clusters,calc_utest
from run_decoupler import calc_all,get_activities,get_activities_per_group,get_geneset_pairs
from plot_funcs import *
all plot_funcs functions are available via
import plot_funcs
dir(plot_funcs)
adata=sc.read_h5ad('/sc/arion/projects/HIMC/reference/single_cell_references/CRC_Merad_lab/visium.h5ad')
adata
deconv=pd.read_csv('/sc/arion/projects/HIMC/reference/single_cell_references/CRC_Merad_lab/visium_deconv.csv',index_col=0)
deconv.head(3)
calculate neighborhood
ad=calc_neighborhood(adata,'pathologist_annotation','Tumor',5,'samples',
['MIME22_BIC31-B4_0','MIME22_BIC31B6-V2_0'],True)
plot neighborhood or any other .obs object for several samples using plot_spatial_scatter_n_neighborhood or plot_spatial_scatter for AnnData with one sample
plot_spatial_scatter_zeros(ad,'samples',None,
*['neighborhood_0_MIME22_BIC31-B4_0','neighborhood_1_MIME22_BIC31-B4_0','neighborhood_2_thin_MIME22_BIC31-B4_0',
'neighborhood_0_MIME22_BIC31B6-V2_0','neighborhood_1_MIME22_BIC31B6-V2_0','neighborhood_2_thin_MIME22_BIC31B6-V2_0',
'pathologist_annotation'])
plot neghborhood composition for several samples using plot_neighborhood_composition_n_samples or plot_neighborhood_composition for AnnData with one sample
plot_neighborhood_composition_n_samples(ad,5,deconv,'samples',['MIME22_BIC31-B4_0','MIME22_BIC31B6-V2_0'],True)
# how to choose one sample
samples=['MIME22_BIC21-A6_0']
adata_test=adata[adata.obs['samples'].isin(samples)]
adata_test.uns['spatial']={s:adata_test.uns['spatial'][s] for s,v in adata_test.uns['spatial'].items() if s in samples}
deconv_test=deconv.reindex(adata_test.obs_names)
samples=['MIME22_BIC21-A6_0']
adata_test=adata[adata.obs['samples'].isin(samples)]
adata_test.uns['spatial']={s:adata_test.uns['spatial'][s] for s,v in adata_test.uns['spatial'].items() if s in samples}
deconv_test=deconv.reindex(adata_test.obs_names)
adata_test=calc_neighborhood(adata_test,'pathologist_annotation','Tumor',1)
plot_spatial_scatter(adata_test,'samples',None,
*['neighborhood_0','neighborhood_1',
'pathologist_annotation'])
plot_neighborhood_composition(adata_test,1,deconv,True)
intra-neighbors are spots of nth layer of area of interest itself
n=5
s=['MIME22_BIC18-A5_0','MIME22_BIC18-A7_0']
ad=calc_intraneighborhood(adata,'pathologist_annotation','Tumor',n,'samples',s)
script will stop as soon as you reach "core"
plot_spatial_scatter_zeros(ad,'samples',s,
*['pathologist_annotation',
*[f'neighborhood_{i}_intra_MIME22_BIC18-A5_0' for i in range(4)],
*[f'neighborhood_{i}_intra_MIME22_BIC18-A7_0' for i in range(5)]
])
plot_neighborhood_composition_n_samples(ad,5,deconv,'samples',s,False,True)
and you can assess intra-neighborhood in one sample
samples=['MIME22_BIC21-A6_0']
adata_test=adata[adata.obs['samples'].isin(samples)]
adata_test.uns['spatial']={s:adata_test.uns['spatial'][s] for s,v in adata_test.uns['spatial'].items() if s in samples}
deconv_test=deconv.reindex(adata_test.obs_names)
adata_test=calc_intraneighborhood(adata_test,'pathologist_annotation','Tumor',3)
plot_spatial_scatter(adata_test,'samples',None,
*[*[f'neighborhood_{i}_intra' for i in range(4)],
'pathologist_annotation'])
plot_neighborhood_composition(adata_test,3,deconv,False,None,True)
right now leiden resolution is chosen by number of clusters (should be 9<=n<=11)
samples=['MIME22_BIC18-A5_0']
adata_test=adata[adata.obs['samples'].isin(samples)]
adata_test.uns['spatial']={s:adata_test.uns['spatial'][s] for s,v in adata_test.uns['spatial'].items() if s in samples}
deconv_test=deconv.reindex(adata_test.obs_names)
# adata_deconv=calc_leiden_on_deconv(adata,deconv,'samples',None,pre_computed_adata=False)
adata_deconv=calc_leiden_on_deconv(adata_test,deconv_test,None,None,pre_computed_adata=False)
plot results for any .obs keys -- here: leiden clusters on gene expression matrix/deconvolution and pathologist annotation on specified samples
toplot=['leiden_deconv_MIME22_BIC21-A6_0','leiden_MIME22_BIC21-A6_0',
'leiden_deconv_MIME22_BIC31B6-V2_0','leiden_MIME22_BIC31B6-V2_0',
'pathologist_annotation']
s=['MIME22_BIC31B6-V2_0','MIME22_BIC21-A6_0']
plot_spatial_scatter_zeros(adata_deconv,'samples',s,*toplot)
you can plot composition of whatever cluster whatever group you want.
here it is a all clusters in leiden_deconv
plot_clusters_composition(adata_deconv,'leiden_deconv',deconv_test)
and then go deep and apply DBSCAN on whatever cluster you want
adata_deconv=get_dense_clusters(adata_deconv,'leiden_deconv','4')
toplot=['leiden_deconv',
'denselabels',
'pathologist_annotation']
plot_spatial_scatter(adata_deconv,None,samples,*toplot)
plot_clusters_composition(adata_deconv,'denselabels',deconv_test)
you can calculate U-statistics for one immune aggregate vs other for every cell subtype and report adjusted p-values
r=calc_utest(adata_test,deconv_test,'denselabels',mcorr=True)
r=r.astype(float).dropna(how='all',axis=1)
_,axs=plt.subplots(1,1,figsize=(.4*len(r.columns),2))
sns.heatmap(r,ax=axs,cmap='coolwarm')
also you can calculate cell-cell interaction inside clusters and compare top expressed receptor-ligand pairs
plot_rl_pairs(adata_deconv,'denselabels',True,True,genes_heatmap=['ANXA1','CLU'])
calculate progeny, dorothea and msigdb collections on all spots. sometimes it crashes with kernel restarting error -- it means that you need more RAM. 8 gb per node is ok
ad_dc=calc_all(adata,msigdb_collection=['kegg_pathways','reactome_pathways'],
ora=True,precomp=False)
gs=get_geneset_pairs(adata,['kegg_pathways','reactome_pathways'])
get activities and group activities for any decoupler calculation. group them and plot on UMAP
acts=get_activities(ad_dc,'reactome_pathways_ora_estimate')
acts_gr=get_activities_per_group(acts,'pathologist_annotation')
clustermap_of_activities(acts_gr.iloc[:,:30])
plot_spatial_scatter_decoupler(acts,'samples',['MIME22_BIC18-A5_0','MIME22_BIC18-A7_0'],
*['pathologist_annotation',*['REACTOME_ABC_TRANSPORTER_DISORDERS','REACTOME_APOPTOSIS_INDUCED_DNA_FRAGMENTATION']])
dc_df=pd.concat([ad_dc.obsm[i] for i in ad_dc.obsm if 'estimate' in i],axis=1).replace({np.inf:0})
adata_deconv.obsm['decoupler']=dc_df.reindex(adata_deconv.obs_names)
acts=get_activities(adata_deconv,'decoupler')
# acts_gr=get_activities_per_group(acts,'pathologist_annotation')
you can calculate most important features for any group in any .obs name
sc.tl.rank_genes_groups(acts,groupby='leiden_deconv')
for example first cluster in leiden clustering calculated on deconvolution is definitely enriched in myofibroblasts
gr='1'
masked=\
acts.uns['rank_genes_groups']['names'][gr][\
acts.uns['rank_genes_groups']['pvals_adj'][gr]<0.05]
tohighlight=masked[:5]
print(tohighlight)
plot_spatial_scatter_decoupler(acts,None,None,
*['leiden_deconv',*tohighlight])
check expression of genes in specified geneset
plot_hm_gs(acts,adata_deconv,'SRF',gs)